package com.luchao.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URL;

/**
 * 网易订阅号
 * https://www.163.com/dy/media/T1479541016079.html
 */
public class HtmlParseUtilWangyi {
    private static Integer num = 10;

    public static void main(String[] args) throws IOException {
        wx();
    }

    /**
     * 热门文章
     * @throws IOException
     */
    private static void wx() throws IOException {
        String url = "https://www.163.com/dy/media/T1479541016079.html";
        Document document = Jsoup.parse(new URL(url), 30000);
        Elements element = document.getElementsByTag("h2 ");
        for (Element element1 : element) {
//            System.out.println("+"+element1.toString());
            String str = element1.toString();
            String urlStr = str.substring(str.indexOf("href=")+6,str.indexOf("html\">")+4);
            String title = str.substring(str.indexOf("html\">")+6,str.indexOf("</a>"));
//            System.out.println(urlStr);
            System.out.println(title);
            Document newDocument = Jsoup.parse(new URL(urlStr), 30000);
            Elements newElement = newDocument.getElementsByTag("post_body");
            System.out.println(newElement.toString());

        }

    }


}
